0: Preparation

Defining the input and output files

# Clean the working environment
rm(list = ls())

knitr::opts_chunk$set(echo = TRUE)


####################################  
# Defining the input files
#################################### 

Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")

Sweep_test_dir <-  Sys.getenv("Sweep_test_dir")

############### 
## Empirical ###
###############

### ROH hotspots ###
Empirical_data_ROH_hotspots_dir  <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###

Empirical_data_F_ROH_dir  <- Sys.getenv("Empirical_data_F_ROH_dir")

### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")

############### 
## Simulated ###
###############

### Causative variant ###

Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")



### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")

Selection_model_ROH_hotspots_dir  <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")

### Inbreeding coefficient ###
Neutral_model_F_ROH_dir  <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir  <- Sys.getenv("Selection_model_F_ROH_dir")

### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")


histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue" 
selection_model_color <- "purple"


# Sys.getenv()  

# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)

Loading libraries

library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1

Causative variant

Fixation time

# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
  # Find the index of the first row where the allele frequency is 0.9 or higher
  fixation_index <- which(data$allele_frequency >= threshold)[1]
  
  # Return the number of rows until fixation is reached
  return(fixation_index - 1)
}
Selection_coefficient Avg_Fixation_time Min_fixation_time Max_fixation_time
2 s01_chr3 33.2 28 36
1 s005_chr3 33.0 29 39
3 s02_chr3 32.6 29 37
4 s03_chr3 27.2 23 30
6 s05_chr3 20.8 15 30
5 s04_chr3 20.4 14 31
7 s06_chr3 12.0 10 14
8 s07_chr3 10.8 9 14
9 s08_chr3 7.4 6 9

Variant position

Window lengths

Causative variant Summary

Selection_coefficient Avg_Length_Mb Min_freq Max_freq Avg_window_freq Avg_freq_variant_100k_window
4 s03_chr3 3.94 0.48 1.00 0.8389348 0.840
2 s01_chr3 5.64 0.28 0.96 0.6922190 0.676
7 s06_chr3 5.92 0.38 1.00 0.8641287 0.872
6 s05_chr3 6.02 0.56 1.00 0.8995497 0.912
3 s02_chr3 6.06 0.40 0.96 0.7634277 0.776
9 s08_chr3 6.58 0.32 1.00 0.7996209 0.804
5 s04_chr3 6.72 0.54 1.00 0.8163600 0.820
1 s005_chr3 10.44 0.40 0.98 0.7454841 0.656
8 s07_chr3 23.44 0.14 1.00 0.8795011 0.844

Bootstrap confidence interval function

Confidence level <=> konfidensgrad

# Function to calculate bootstrap confidence intervals
bootstrap_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
  
  # Function to calculate the mean for each bootstrap sample
  compute_bootstrap_mean_fun <- function(observed_data_urn) {
    bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
    bootstrap_estimate <- mean(bootstrap_dataset)
    return(bootstrap_estimate)
  }
  
  # Perform bootstrap resampling
  bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
  
  # Calculate the percentiles for the confidence interval
  alpha <- (1 - confidence_level) / 2
  confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
  confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
  
  # Return the confidence interval
  return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}

1: ROH-Hotspots

1.1 : Autosome ROH-frequencies

1.1.1 : Empirical - ROH frequency

1.1.2: Neutral Model - ROH frequency

1.1.3: Selection Model

1.4 ROH-hotspot threshold summary

## ROH-hotspot selection testing results://n
Model Avg_ROH_hotspot_threshold
Neutral 0.416
selection_model_s02_chr3 0.452
selection_model_s03_chr3 0.456
selection_model_s005_chr3 0.476
selection_model_s01_chr3 0.496
selection_model_s05_chr3 0.496
selection_model_s04_chr3 0.532
selection_model_s08_chr3 0.564
selection_model_s07_chr3 0.568
selection_model_s06_chr3 0.600
Empirical 0.750

2: Inbreeding coefficient

2.1 Empirical data

## Overall Average Avg_F_ROH for  german_shepherd : 0.2750777 //n

2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.3411836 //n
## [1] "Bootstrap 95% Confidence Interval: [0.299992136, 0.38932096]"

2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for  selection_model_s005_chr3 : 0.3851778 //n[1] "Bootstrap 95% Confidence Interval: [0.36704765, 0.40664288]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s01_chr3 : 0.3901095 //n[1] "Bootstrap 95% Confidence Interval: [0.32800948, 0.452287203]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s02_chr3 : 0.389057 //n[1] "Bootstrap 95% Confidence Interval: [0.34961284, 0.41288596]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s03_chr3 : 0.3750411 //n[1] "Bootstrap 95% Confidence Interval: [0.34084448, 0.421364716]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s04_chr3 : 0.4344734 //n[1] "Bootstrap 95% Confidence Interval: [0.39021996, 0.47333448]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s05_chr3 : 0.4034206 //n[1] "Bootstrap 95% Confidence Interval: [0.36243468, 0.44244412]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s06_chr3 : 0.485658 //n[1] "Bootstrap 95% Confidence Interval: [0.41431812, 0.55071608]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s07_chr3 : 0.4494377 //n[1] "Bootstrap 95% Confidence Interval: [0.426247766, 0.483897177]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s08_chr3 : 0.4411038 //n[1] "Bootstrap 95% Confidence Interval: [0.382334925, 0.49404564]"

2.4 F_ROH summary

Model F_ROH Lower_CI Upper_CI
Empirical 0.27508 NA NA
Neutral 0.34118 0.29999 0.38932
s03 0.37504 0.34084 0.42136
s005 0.38518 0.36705 0.40664
s02 0.38906 0.34961 0.41289
s01 0.39011 0.32801 0.45229
s05 0.40342 0.36243 0.44244
s04 0.43447 0.39022 0.47333
s08 0.44110 0.38233 0.49405
s07 0.44944 0.42625 0.48390
s06 0.48566 0.41432 0.55072

2.4.1 Visualizaing F_ROH

## Models where empirical F_ROH is within CI:
## character(0)
## 
## Models where empirical F_ROH is outside CI:
##  [1] "s005"    "s01"     "s02"     "s03"     "s04"     "s05"     "s06"    
##  [8] "s07"     "s08"     "Neutral"

3: Expected Heterozygosity

3.1 Empirical data

3.2 Neutral Model

3.3 Selection Model

3.4 Expected Heterozygosity Summary

Model H_e_5th_percentile Lower_CI Upper_CI
s08 0.12414 0.10877 0.13782
s01 0.12632 0.11628 0.13635
s005 0.12664 0.11628 0.13700
s02 0.13061 0.11628 0.14753
Neutral 0.13338 0.12955 0.14040
s07 0.13473 0.11248 0.15188
s04 0.13676 0.11968 0.15384
s06 0.13681 0.12316 0.15346
s03 0.13703 0.12080 0.16008
s05 0.13928 0.13360 0.14547
Empirical NA NA NA

4: Summary

4.0 General comparison

4.0.1 ROH-hotspot threshold comparison

## 
##  ROH-hotspot threshold comparison between the different datasets
Model Avg_ROH_hotspot_threshold
Neutral 0.416
selection_model_s02_chr3 0.452
selection_model_s03_chr3 0.456
selection_model_s005_chr3 0.476
selection_model_s01_chr3 0.496
selection_model_s05_chr3 0.496
selection_model_s04_chr3 0.532
selection_model_s08_chr3 0.564
selection_model_s07_chr3 0.568
selection_model_s06_chr3 0.600
Empirical 0.750

4.0.1 F_ROH comparison

Model F_ROH Lower_CI Upper_CI
Empirical 0.27508 NA NA
Neutral 0.34118 0.29999 0.38932
s03 0.37504 0.34084 0.42136
s005 0.38518 0.36705 0.40664
s02 0.38906 0.34961 0.41289
s01 0.39011 0.32801 0.45229
s05 0.40342 0.36243 0.44244
s04 0.43447 0.39022 0.47333
s08 0.44110 0.38233 0.49405
s07 0.44944 0.42625 0.48390
s06 0.48566 0.41432 0.55072

4.1: Hotspot comparison

4.1.1: Selection test (Sweep test)

## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the fifth percentile of the neutral models average H_e are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.12912"
Name Under_selection Window_based_Average_H_e
Hotspot_chr3_window_1 Yes 0.1082216
Hotspot_chr3_window_3 Yes 0.1230482
Hotspot_chr3_window_2 No 0.1445314
Hotspot_chr17_window_1 No 0.1449621
Hotspot_chr17_window_2 No 0.1486567
Hotspot_chr19_window_1 No 0.1603723
## [1] "ROH-hotspots under selection:"
Name Under_selection Window_based_Average_H_e
Hotspot_chr3_window_1 Yes 0.1082216
Hotspot_chr3_window_3 Yes 0.1230482

4.1.2: Selection Strength Test (Sweep test)

4.1.1.1 Extracting Hotspots under selection

5 Visualizing Expected Heterozygoisty

5.1 Bootstrap CI for mean 5th percentile H_e

## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
##  [1] "s005"    "s01"     "s02"     "s03"     "s04"     "s05"     "s06"    
##  [8] "s07"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## [1] "s005" "s01"  "s02"  "s03"  "s04"  "s07"  "s08" 
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## [1] "s05"     "s06"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## [1] "s02" "s03" "s04" "s05" "s06" "s07"
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## [1] "s005"    "s01"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## [1] "s02" "s03" "s04" "s05" "s06" "s07"
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## [1] "s005"    "s01"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## [1] "s03" "s04" "s06" "s07"
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## [1] "s005"    "s01"     "s02"     "s05"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
##  [1] "s005"    "s01"     "s02"     "s03"     "s04"     "s05"     "s06"    
##  [8] "s07"     "s08"     "Neutral"

5.2 5th percentile of the extreme H_e values

Selection_Coefficient H_e_Threshold
s005 0.11280
s01 0.11280
s02 0.11280
s03 0.11454
s04 0.11280
s06 0.11280
s07 0.09500
s08 0.10028
## 
## Hotspot Name: Hotspot_chr3_window_1

## 
## Hotspot Name: Hotspot_chr3_window_3